Take-Home Exercise 3

Putting Visual Analytics into Practical Use

Ong Zhi Rong Jordan https://example.com/norajones
2022-05-15
packages = c('tidyverse', 'ggrepel', 'ggiraph', 'patchwork', 'plotly', 'hrbrthemes', 
             'heatmaply', 'viridis','ggridges','binr', "trelliscopejs","zoo","ggthemes")

for(p in packages){
  if(!require(p, character.only = T)){
    install.packages(p)
  }
  library(p, character.only = T)
}
financeJ <- readRDS(file = "data/financeJ.rds")
participantsD <- readRDS(file = "data/participantsD.rds")

head (financeJ)
# A tibble: 6 × 4
  participantId timestamp           category  amount
          <dbl> <dttm>              <chr>      <dbl>
1             0 2022-03-01 00:00:00 Wage      2473. 
2             0 2022-03-01 00:00:00 Shelter   -555. 
3             0 2022-03-01 00:00:00 Education  -38.0
4             1 2022-03-01 00:00:00 Wage      2047. 
5             1 2022-03-01 00:00:00 Shelter   -555. 
6             1 2022-03-01 00:00:00 Education  -38.0
head (participantsD)
# A tibble: 6 × 7
  participantId householdSize haveKids   age educationLevel     
          <dbl>         <dbl> <lgl>    <dbl> <chr>              
1             0             3 TRUE        36 HighSchoolOrCollege
2             1             3 TRUE        25 HighSchoolOrCollege
3             2             3 TRUE        35 HighSchoolOrCollege
4             3             3 TRUE        21 HighSchoolOrCollege
5             4             3 TRUE        43 Bachelors          
6             5             3 TRUE        32 HighSchoolOrCollege
# … with 2 more variables: interestGroup <chr>, joviality <dbl>
cuts <- bins(participantsD$age,target.bins = 7,minpts = 120)
cuts$breaks <- bins.getvals(cuts)
cuts$binct
[18, 23] [24, 29] [30, 35] [36, 42] [43, 48] [49, 54] [55, 60] 
     136      137      154      153      136      156      139 
age_group1 <- 18:23 #create a vector of age
age_group2 <- 24:29
age_group3 <- 30:35
age_group4 <- 36:42
age_group5 <- 43:48
age_group6 <- 49:54
age_group7 <- 55:60

participants_final <- participantsD %>%
  mutate (agegroup= case_when(
    age %in% age_group1 ~ "18-23",
    age %in% age_group2 ~ "24-29",
    age %in% age_group3 ~ "30-35",
    age %in% age_group4 ~ "36-42",
    age %in% age_group5 ~ "43-48",
    age %in% age_group6 ~ "49-54",
    age %in% age_group7 ~ "55-60")) %>%
  select (participantId,agegroup)
financeJ$timestamp <- (strptime(financeJ$timestamp , "%Y-%m-%d"))


final <- merge(participants_final,financeJ,by=c("participantId","participantId"),all.x=T)

final <- final %>%
  mutate (Timestamp = as.Date(as.yearmon(timestamp), frac = 0)) %>%
  rename('Participant_ID' = 'participantId')
data_plot <- final %>%
  select(c("Timestamp","Participant_ID", "category","amount")) %>% #choose the columns to subset
  group_by(Timestamp,category,Participant_ID)%>% 
  summarise(amount = round(sum(amount),2)) %>%
  pivot_wider(names_from = "category",values_from = "amount")
data_plot[is.na(data_plot)] = 0
data_plot [3:8] <- abs(data_plot[3:8])

data_plot <- data_plot %>%
  mutate(Expenses = (Education + Food + Recreation + Shelter)) 

trelis_plot <- data_plot %>%
  select(c("Timestamp","Participant_ID","Expenses", "Wage"))
ggplot(trelis_plot, aes(y = Expenses, x = (Timestamp))) +
  geom_point() + geom_line() +
  labs (y = "Total Expenses \n of the Month", x = 'Month', title = "Monthly Expenditure of Participant") + 
  scale_x_date(date_breaks = "months",
               date_labels = "%b") +
  facet_trelliscope(~ Participant_ID, nrow = 2, ncol = 2, width = 800, as_plotly = TRUE, path = "trellis/", self_contained = TRUE)
ggplot(trelis_plot, aes(y = Wage, x = (Timestamp))) +
  geom_point() + geom_line() +
  labs (y = "Total Wage \n of the Month", x = 'Month', title = "Monthly Wage of Participant") + 
  scale_x_date(date_breaks = "months",
               date_labels = "%b") +
  facet_trelliscope(~ Participant_ID, nrow = 2, ncol = 2, width = 800, as_plotly = TRUE, path = "trellis1/")
histo_plot <- final %>%
  select(c("Participant_ID","category","amount","agegroup")) %>%
  group_by(Participant_ID,category,agegroup) %>%
  summarise (amount = round(sum(amount),2)) %>%
  pivot_wider(names_from = "category",values_from = "amount")
histo_plot[is.na(histo_plot)] = 0
histo_plot [3:8] <- abs(histo_plot[3:8])


histo_plot <- histo_plot %>%
  mutate(Expenses = Education + Food + Recreation + Shelter) %>%
  mutate(Income = RentAdjustment + Wage) %>%
  mutate (Savings = Income - Expenses) %>%
  mutate (PctSavings = round((Savings/Income) *100,0)) %>%
  mutate(PctNeedsExpenses = round(((Education+Food+Shelter)/Income) *100,0))
p <- ggplot(data=histo_plot, 
            aes(x = PctSavings)) +
  geom_vline(aes(xintercept = 20.5), color="red", linetype = "dashed", alpha = 0.3) +
  annotate ("text", x=9, y = 0.5, label = "Participants with less than \n20% of Savings", size = 2.5, angle = 0,) +
  geom_dotplot_interactive(              
    aes(tooltip = agegroup,
        data_id = agegroup),
    stackgroups = TRUE,                  
    binwidth = 1,                        
    method = "histodot") +  
  scale_y_continuous(NULL, breaks = NULL) +
  scale_x_continuous(labels = c("0","20","40","60", "80", "100"),breaks = c(0,20,40,60,80,100)) +
  theme_few() + labs (title = "Participants Savings in Percentage", x = "Savings (%)")


p2 <- ggplot(data=histo_plot, 
            aes(x = PctNeedsExpenses)) +
  geom_vline(aes(xintercept = 49.5), color="blue", linetype = "dashed", alpha = 0.3) +
    annotate ("text", x=70, y = 0.5, label = "Participants that spend more than \n50% of income on Needs", size = 2.5, angle = 0,) +
  geom_dotplot_interactive(              
    aes(tooltip = agegroup,
        data_id = agegroup),
    stackgroups = TRUE,                  
    binwidth = 1,                        
    method = "histodot") +  
  scale_y_continuous(NULL, breaks = NULL) +
  scale_x_continuous(labels = c("0","20","40","60", "80", "100"),breaks = c(0,20,40,60,80,100), limits = c(0,100)) +
  theme_few() + labs (title = "Participants Expenses (Needs) in Percentage", x = "Expenses (%)")


girafe(                               
  code = print (p/p2),                             
  width_svg = 8,                         
  height_svg = 8)